In [14]:
#import necessary Python libraries
import pandas as pd
import folium
import datetime
import numpy as np
import seaborn as sns
import plotly.express as px
import matplotlib.pyplot as plt 

# read csv files
df1 = pd.read_csv('2020.csv') # the data is published on Kaggle  https://www.kaggle.com/datasets/onlyrohit/crimes-in-chicago
df2 = pd.read_csv('2021.csv')
df3 = pd.read_csv('2022.csv') 
pop = pd.read_excel('chicagopop.csv') # population of each community area on Wikipedia and the region they belong  https://en.wikipedia.org/wiki/Community_areas_in_Chicago
In [15]:
df1 = pd.concat([df1, df2], axis=0)  #concat all the dataframes of each year into 1 dataframe
df = pd.concat([df1, df3], axis=0)
 

#drop unecessary columns that we will not use
df = df.drop(columns=['Case Number','IUCR','Description', 'Beat', "District", "Ward", "FBI Code", "X Coordinate", "Y Coordinate", "Updated On",'Location'])
df = df.dropna()  #drop na values
In [16]:
pop.head(3)  #check that we can see the data
Out[16]:
Name Population Area
0 Rogers Park 55628 Far North Side
1 West Ridge 77122 Far North Side
2 Uptown 57182 Far North Side
In [17]:
df.head(3)
Out[17]:
ID Date Block Primary Type Location Description Arrest Domestic Community Area Year Latitude Longitude
0 12016034 01/01/2020 12:00:00 AM 018XX N WINNEBAGO AVE DECEPTIVE PRACTICE APARTMENT False False 22.0 2020 41.915306 -87.686639
1 12220321 01/01/2020 12:00:00 AM 091XX S DREXEL AVE OFFENSE INVOLVING CHILDREN RESIDENCE False True 47.0 2020 41.728192 -87.600985
2 12013828 01/01/2020 12:00:00 AM 044XX S LAVERGNE AVE CRIMINAL SEXUAL ASSAULT APARTMENT False False 56.0 2020 41.812274 -87.748177
In [18]:
#we observe that we can insert the community area number of each neighbourhood by reseting the index + 1 
pop = pop.reset_index()
pop.rename(columns={'index':'Community Area'}, inplace=True)

# increase the index of all rows by 1
pop['Community Area'] = pop['Community Area'] + 1
In [19]:
from datetime import datetime
# extract Hour from Date column and store it in new columns called 'Month','Hour'
df['Hour'] = df['Date'].apply(lambda x: datetime.strptime(x, '%m/%d/%Y %I:%M:%S %p').hour)

# extract month from Date column and store it in a new column called 'Month'
df['Month'] = df['Date'].apply(lambda x: datetime.strptime(x, '%m/%d/%Y %I:%M:%S %p').month)
In [20]:
df = pd.merge(df, pop, on='Community Area', how='inner') #merge with pop, so all the data is in df dataframe
In [21]:
df.head(3)
Out[21]:
ID Date Block Primary Type Location Description Arrest Domestic Community Area Year Latitude Longitude Hour Month Name Population Area
0 12016034 01/01/2020 12:00:00 AM 018XX N WINNEBAGO AVE DECEPTIVE PRACTICE APARTMENT False False 22.0 2020 41.915306 -87.686639 0 1 Logan Square 71665 North Side
1 11950278 01/01/2020 12:01:00 AM 038XX W FULLERTON AVE OTHER OFFENSE RESIDENCE False True 22.0 2020 41.924536 -87.722407 0 1 Logan Square 71665 North Side
2 11939212 01/01/2020 01:20:00 AM 020XX N CALIFORNIA AVE CRIMINAL DAMAGE CONVENIENCE STORE True False 22.0 2020 41.917654 -87.697205 1 1 Logan Square 71665 North Side
In [22]:
import folium
from folium.plugins import HeatMap
from folium.plugins import MarkerCluster

m = folium.Map([41.881,-87.623], zoom_start=14) #latitude and longitude of Chicago 
latlon_2022 = df[['Latitude','Longitude']]
crime_heatmap = folium.Map(location= [41.881,-87.623], 
                           tiles = "Stamen Toner",
                           zoom_start = 12)
HeatMap(latlon_2022, min_opacity=0.05).add_to(crime_heatmap)
crime_heatmap
Out[22]:
Make this Notebook Trusted to load map: File -> Trust Notebook
In [23]:
f = df.groupby('Name').count()
areaname = f[['ID']]
areaname = areaname.reset_index()
areaname = areaname.rename(columns = {'ID':'Crime Records'})
neighbourhoods = pd.merge(pop, areaname, on='Name', how='inner')
neighbourhoods['Crime Rate'] = neighbourhoods['Crime Records'].div(neighbourhoods['Population'])
neighbourhoods['Victims per 100.000 People'] = neighbourhoods['Crime Records'].div(neighbourhoods['Population']) * 100000
In [24]:
neighbourhoods.sort_values('Crime Rate',ascending = False).head(5)
Out[24]:
Community Area Name Population Area Crime Records Crime Rate Victims per 100.000 People
25 26 West Garfield Park 17433 West Side 12542 0.719440 71944.014226
36 37 Fuller Park 2567 South Side 1798 0.700429 70042.851578
67 68 Englewood 24369 South Side 14100 0.578604 57860.396405
28 29 North Lawndale 34794 West Side 19353 0.556217 55621.658907
68 69 Greater Grand Crossing 31471 Far Southwest Side 16963 0.539004 53900.416256
In [25]:
neighbourhoods.sort_values('Crime Rate',ascending = False).tail(5)
Out[25]:
Community Area Name Population Area Crime Records Crime Rate Victims per 100.000 People
4 5 North Center 35114 North Side 3241 0.092299 9229.936777
9 10 Norwood Park 38303 Far North Side 3284 0.085737 8573.740960
73 74 Mount Greenwood 18628 Far Southwest Side 1480 0.079450 7945.028989
11 12 Forest Glen 19596 Far North Side 1542 0.078690 7868.952848
8 9 Edison Park 11525 Far North Side 751 0.065163 6516.268980
In [26]:
from folium import plugins
from folium.plugins import MarkerCluster

df['Date'] = pd.to_datetime(df['Date'])
may31 = df[df['Date'].dt.date == pd.to_datetime("2020-05-31").date()]
may31 = may31[may31['Name']== 'Near West Side']

#Map points of the crime events
m2 = folium.Map([41.8668,-87.6664], zoom_start=14) #Latitude & Longitude of Near West Side
for index, row in may31.iterrows():
      folium.CircleMarker([row['Latitude'], row['Longitude']],
                        radius=5,
                        popup=row['Primary Type'],
                        fill_color="#3db7e4", 
                       ).add_to(m2)

dfmatrix = may31[['Latitude', 'Longitude']].values
# plot heatmap
m2.add_child(plugins.HeatMap(dfmatrix, radius=15))

#now we find the centroid and add it to the plot
lat = []
long = []
for index, row in may31.iterrows():
    lat.append(row["Latitude"])
    long.append(row["Longitude"])
lat1=sum(lat)/len(lat)
lat2=sum(long)/len(long)
folium.CircleMarker([lat1,lat2],
                        radius=5,
                        popup="CENTER LOCATION",
                        color='black',
                        fill_color="#3db7e4", 
                       ).add_to(m2)
m2
Out[26]:
Make this Notebook Trusted to load map: File -> Trust Notebook